Newer
Older
Digital_Repository / Repositories / Maps / Otago Eprints / Eclipse_Projects / Eprints_FINAL / View_Based / Simulation_View.pl
#!/usr/bin/env perl
# Perl Script to generate bars representing the volume of access of the
# Otago_eprints archive by geographical location
# Author Nigel Stanger
# Altered by Hayden Kane November 2007
use strict;
use DBI;
use Geo::IP;
use POSIX qw(floor);
use CGI;

# Database connection variables.
my ($dsn)       = "DBI:mysql:database=eprintstats;host=127.0.0.1";
my ($user_name) = "eprintstatspub";
my ($password)  = "public";
my ( $connect, $query, %types, %unmapped, $stat, $row, $num_rows, $vtype );

# Geolocation database.
my ($gi);
my ($gidb) = '/usr/local/share/GeoIP/GeoLiteCity.dat';

# Longitude adjustment lookup table. The size of a degree of longitude
# shrinks as we approach the pole, so we need to scale the size of the
# bars that we draw to compensate. The values are derived from simple
# trigonometry. One degree latitude intervals is probably overkill, but
# it was easy to generate and is probably also easier to process (we just
# take the absolute value and round to the nearest integer).
my (@long_adjust) = (
					  1,           1.000149936, 1.000625031, 1.001401156,
					  1.002504822, 1.003938193, 1.005527222, 1.007577665,
					  1.009814805, 1.012548385, 1.015451362, 1.018863602,
					  1.022455549, 1.026414998, 1.030723667, 1.035416774,
					  1.040342076, 1.045806114, 1.051493576, 1.057774136,
					  1.064328263, 1.071308119, 1.07855449,  1.086459634,
					  1.094691064, 1.103413101, 1.112646298, 1.122444401,
					  1.132640933, 1.143416279, 1.154798315, 1.166647234,
					  1.179193306, 1.192438327, 1.206420497, 1.220995149,
					  1.236155295, 1.252205744, 1.269161249, 1.286871805,
					  1.30559452,  1.325133265, 1.34585379,  1.367512898,
					  1.390218486, 1.414290258, 1.439883436, 1.466366234,
					  1.494622451, 1.52427924,  1.555922715, 1.589286423,
					  1.624573794, 1.661877673, 1.701368815, 1.743769606,
					  1.788337802, 1.836254359, 1.887432209, 1.94210986,
					  2.000049973, 2.062828574, 2.130469499, 2.203424356,
					  2.281552845, 2.366264633, 2.459472746, 2.560324974,
					  2.669801881, 2.7913935,   2.924802689, 3.072783109,
					  3.236797412, 3.421061629, 3.629874841, 3.865462623,
					  4.133753357, 4.448482828, 4.811613369, 5.242729893,
					  5.759533746, 6.395493768, 7.190621631, 8.209846154,
					  9.579463858, 11.47777459, 14.35030477, 19.13145315,
					  28.71090387, 57.42180775, 57.42180775,
);

# Miscellaneous variables.
my ( %cities, %IPs );
my ($num_entries) = -1;
my ($num_hits)    = 0;
my ( $ip, $count, $location, $country, $year, $month );
my ( $lat, $long, $city, $key ) = ( 0, 0, '', '' );
my ( $maxcolour, $red, $blue );
$unmapped{'download'} = $unmapped{'abstract'} = 0;
my $cgi   = new CGI;
my $param = $cgi->param('BBOX');
my @BBOX  = split( /,/, $param );
my ( $west, $south, $east, $north ) =
  ( $BBOX[0], $BBOX[1], $BBOX[2], $BBOX[3] );

# A degree of latitude is always of constant length, so define it globally.
# The length of a degree of longitude changes depending on the latitude.
my ($latsize) = 0.075;
$num_entries = -1;

# Connect to GEO::IP Database
$gi = Geo::IP->open( $gidb, GEOIP_STANDARD )
  or die "Unable to open GeoIP database $gidb\n";

# Connect to MySQL database using connection variables set above
$connect = DBI->connect( $dsn, $user_name, $password, { RaiseError => 1 } );

# Query to get data from MySQL database about the usage of the otago_eprints archive
$query =
"SELECT ip, view_type, country_name, year(request_date) as year, month(request_date) as month, COUNT(*) AS count 
FROM view 
WHERE archive_name = 'otago_eprints' AND country_code NOT LIKE 'X@%'
GROUP BY ip, view_type, country_name, year, month
ORDER BY year, month"
  . ( ( $num_entries > 0 ) ? " LIMIT $num_entries" : '' );
$stat = $connect->prepare($query);
$stat->execute();
$num_rows = $stat->rows;

# If query returned any results they need to be processed
if ( $num_rows > 0 ) {
	$num_entries = $num_rows if ( $num_entries < 1 );

	# Loop to process each of the query results
	while ( $row = $stat->fetchrow_hashref() ) {

		# extracting values from query result
		$ip      = $row->{'ip'};
		$count   = $row->{'count'};
		$country = $row->{'country_name'};

   # change the country value to New Zealand if either of the below alternatives
   # appear as the value for country
		$country = 'New Zealand'
		  if (    ( $country eq 'Otago Intranet' )
			   || ( $country eq 'Repository Admin' ) );
		$vtype = $row->{'view_type'};
		$year  = $row->{'year'};
		$month = $row->{'month'};

	 # Month must be in the form mm to work correctly. Sprintf is used to ensure
	 # this, ie January is represented as 01
		$month = sprintf( "%02d", $month );
		$IPs{$ip} = 1;

		# Get the location of the archive use by ip address
		$location = $gi->record_by_addr($ip);

		# Extract values from the location result
		if ( defined($location) ) {
			$lat  = $location->latitude;
			$long = $location->longitude;
			if (    ( $lat > $south && $lat < $north )
				 && ( $long > $west && $long < $east ) )
			{

				# Extract values from the location result
				if ( $location->city eq '' ) {
					$city = $country . ' (unidentified)';
					$key  = sprintf( "!%s (%f, %f) %s %s",
									$year, $month, $country, $lat, $long );
				} else {
					$city = $location->city;
					$key  =
					  sprintf( "%s %s %s %s", $year, $month, $city, $country );
				}

				# If there are multiple points for the same city in
				# the same country, we accumulate the lats and longs for these
				# points and keep a track of how many points there are in total,
				# so that we can work out a weighted average latlong for the
				# city. Note that this assumes that each city name only exists
				# once within a country, i.e., it will break if there are
				# multiple cities with the same name in the same country!
				if ( !defined( $cities{$key} ) ) {
					$cities{$key}{'name'}       = $city;
					$cities{$key}{'year'}       = $year;
					$cities{$key}{'month'}      = $month;
					$cities{$key}{'lat'}        = 0;
					$cities{$key}{'long'}       = 0;
					$cities{$key}{'abstract'}   = 0;
					$cities{$key}{'download'}   = 0;
					$cities{$key}{'num_points'} = 0;
				}
				$cities{$key}{'lat'}  += $lat;
				$cities{$key}{'long'} += $long;
				$cities{$key}{$vtype} += $count;
				$cities{$key}{'num_points'}++;
			}
		} else {    

			# To keep count of the unmapped accesses of the otago_eprints
			# Due to the limitations in the GEO Database that is being used
			# GeoLiteCity is said to be over 98% accurate on a country level
			# and 70% accurate on a city level
			# Not used in this program but is here for future development if
			# necessary
			$unmapped{$vtype} += $count;
		}
	}

	# Average the location of multiple points for the same city, weighted
	# by the number of hits for each point.
	foreach $city ( keys %cities ) {
		$cities{$city}{'lat'} =
		  $cities{$city}{'lat'} / $cities{$city}{'num_points'};
		$cities{$city}{'long'} =
		  $cities{$city}{'long'} / $cities{$city}{'num_points'};
	}

# Print the header so that CGI will work necessary so that output is recognised as KML
# NOTE: need to set up web server (Apache or IIS) to recognise this MIME type
	print "Content-Type: application/vnd.google-earth.kml+xml\n\n";
# Print out the KML file
# NOTE The kml tag has been set up so that validation can occur against the published
#      schema to assist debugging. However need to write the file to an .xml file for
#      it to validate in eclipse, not necessary for actual .kml output
	print '<?xml version="1.0"?>
<kml xmlns="http://earth.google.com/kml/2.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://earth.google.com/kml/2.1 KMLSchema.xsd">
	<Folder>
		<name>Simulation of access over time</name>
		<description>Select this folder to display a simulation of the volume of access over time from which the repository has been accessed</description>
		<Folder>
			<name>Volume (known locations)</name>
			<description>The volume of abstract views (blue) and document downloads (red), represented as a bar chart. Each 1,000 metres of height represents one abstract view or document download. Only known locations are displayed.</description>';

	# Plot volume bars for known locations as a separate folder so that
	# we can turn them on and off as a group.
	foreach $city ( sort( keys(%cities) ) ) {

		# Exclude unknown locations because they mess up the display somewhat.
		if ( $city !~ /^!/ ) {
			make_sim(
					  $cities{$city}{'name'}, $cities{$city}{'download'},
					  $cities{$city}{'lat'},  $cities{$city}{'long'},
					  'download',             $cities{$city}{'year'},
					  $cities{$city}{'month'}
			);
			make_sim(
					  $cities{$city}{'name'}, $cities{$city}{'abstract'},
					  $cities{$city}{'lat'},  $cities{$city}{'long'},
					  'abstract',             $cities{$city}{'year'},
					  $cities{$city}{'month'}
			);
		}
	}
	print '
		</Folder>
		<Folder>
			<name>Volume (unknown locations)</name>
			<visibility>1</visibility>
			<description>The volume of abstract views (blue) and document downloads (red), represented as a bar chart. Each 1,000 metres of height represents one abstract view or document download. Only unknown locations are displayed.</description>';

	# Plot volume bars for unknown locations as a separate folder so that
	# we can turn them on and off as a group.
	foreach $city ( sort( keys(%cities) ) ) {

		# Include unknown locations only.
		if ( $city =~ /^!/ ) {
			make_sim(
					  $cities{$city}{'name'}, $cities{$city}{'abstract'},
					  $cities{$city}{'lat'},  $cities{$city}{'long'},
					  'download',             $cities{$city}{'year'},
					  $cities{$city}{'month'}
			);
			make_sim(
					  $cities{$city}{'name'}, $cities{$city}{'abstract'},
					  $cities{$city}{'lat'},  $cities{$city}{'long'},
					  'abstract',             $cities{$city}{'year'},
					  $cities{$city}{'month'}
			);
		}
	}
	print '
		</Folder>
	</Folder>
</kml>
';
}

# Close off connection to the database
$stat->finish();
$connect->disconnect();

# Routine to print out all the necessary information to create and make bars for each placemark for each time period
sub make_sim {
	my ( $name, $count, $lat, $long, $type, $year, $month ) = @_;
	my ($endmonth) = $month + 1;
	my ($endyear)  = $year;
	if ( $endmonth == 13 ) {
		$endyear += 1;
		$endmonth = 1;
	}
	$endmonth = sprintf( "%02d", $endmonth );

	# Work out the longitudinal width of the bar adjusted for the latitude.
	# The latitude size is constant.
	my ($longsize) = latitude_adjust( $lat, $latsize * 2 );

	# A download bar starts $longsize degrees to the west of $long.
	# An abstract bar starts at $long.
	$long -= $longsize if ( $type eq 'download' );
	print '
			<Placemark>
				<name>'
	  . $name . '</name>
				<visibility>0</visibility>
				<description>
						<![CDATA[<span style="color:'
	  . ( ( $type eq 'download' ) ? 'red' : 'blue' ) . ';">' . $count . ' '
	  . $type
	  . ( ( $count != 1 ) ? 's' : '' )
	  . '</span>]]>
				</description>
				<LookAt>
					<longitude>'
	  . $long . '</longitude>
					<latitude>'
	  . $lat . '</latitude>
				<range>60000</range>
					<tilt>45</tilt>
					<heading>3</heading>
				</LookAt>
				<TimeSpan>
					<begin>'
	  . $year . '-' . $month . '</begin>
					<end>' . $endyear . '-' . ($endmonth) . '</end>
				</TimeSpan>
				<Style>
					<PolyStyle>
						<color>c0'
	  . ( ( $type eq 'abstract' ) ? 'ff' : '00' ) . '00'
	  . ( ( $type eq 'download' ) ? 'ff' : '00' )
	  . '</color>
					</PolyStyle>
				</Style>
				<Polygon>
					<extrude>1</extrude>
					<altitudeMode>relativeToGround</altitudeMode>
					<outerBoundaryIs>
						<LinearRing>
							<coordinates>
									' . $long . ',' . ( $lat - $latsize ) . ',' . ( $count * 1000 ) . '
									' . $long . ',' . ( $lat + $latsize ) . ',' . ( $count * 1000 ) . '
									'
	  . ( $long + $longsize ) . ','
	  . ( $lat + $latsize ) . ','
	  . ( $count * 1000 ) . '
									'
	  . ( $long + $longsize ) . ','
	  . ( $lat - $latsize ) . ','
	  . ( $count * 1000 ) . '
									' . $long . ',' . ( $lat - $latsize ) . ',' . ( $count * 1000 ) . '
							</coordinates>
						</LinearRing>
					</outerBoundaryIs>
				</Polygon>
			</Placemark>';
}

# Routine to adjust the size of the bars based on where they are
# so that they display accuratly on Google Earth
sub latitude_adjust {
	my ( $lat, $angle ) = @_;
	$lat = round( abs($lat) );
	return ( $long_adjust[$lat] * $angle );
}

# Routine to round to the nearest whole number
sub round {
	my ($n) = shift;
	return int( $n + 0.5 * ( $n <=> 0 ) );
}